This document contains analysis of differences in microbial alpha diversity between healthy samples from American Gut Project and samples with diagnosed Inflammatory bowel syndrome (Ulcerative colitis and Crohn’s disease) and Clostridium difficile infection, as well as samples after fecal microbiota transplantation.

The aim of this analysis is to show that there are significant differences between microbiome alpha diversity in healthy and disease samples. Furthermore, we want to show that fecal transplantation improves alpha diversity in short- and long-term.

Loading libraries:

#install.packages("readr")
library("readr")
#install.packages('data.table')
library(data.table)
library(stringr)
#install.packages('ggplot2')
library(ggplot2)
library(gridExtra)
library(grid)
library(plyr)
library(purrr)
library(dplyr)
#install.packages('flextable')
library(flextable)
library(tibble)
library(ComplexHeatmap)
library(RColorBrewer)
library(ggplotify)
library(here)
library(tableone)

#install.packages("table1")
library(table1)

Load healthy subsample of AGP data:

# Load healthy samples' table
all_healthy <- read.csv(here("01_tidy_data", "AGP_healthy.csv.gz"), header = TRUE, sep = ",")

all_healthy <- dplyr::select(all_healthy, sample_id, shannon_entropy, chao1, menhinick, margalef, fisher_alpha, simpson, pielou_evenness, gini_index, strong, simpson, faith_pd, sex, race, host_age, condition)

names(all_healthy)[names(all_healthy) == 'host_age'] <- 'age'

Load Inflammatory Bowel Disease data:

IBD <- read.csv(here("01_tidy_data", "IBD.csv.gz"), header = TRUE, sep = ",")

Load Ulcerative colitis data:

UC <- read.csv(here("01_tidy_data", "UC.csv.gz"), header = TRUE, sep = ",")

Load Longitudinal Chron’s disease study data:

CD <- read.csv(here("01_tidy_data", "CD_2.csv.gz"), header = TRUE, sep = ",")

Load CDI data from NCBI data

CDI <- read.csv(here("01_tidy_data", "ncbi_CDI.csv.gz"), header = TRUE, sep = ",")

Load Changes following fecal microbial transplantation for recurrent CDI data:

CDI_FMT <- read.csv(here("01_tidy_data", "CDI_FMT.csv.gz"), header = TRUE, sep = ",")

Load Fecal transplant - CDI with underlying IBD data:

FMT_IBD_CDI <- read.csv(here("01_tidy_data", "FMT_IBD_CDI.csv.gz"), header = TRUE, sep = ",")

Load Hospital Clinic’s CDI data

hospital_CDI <- read.csv(here("01_tidy_data", "hosp_CDI.csv.gz"), header = TRUE, sep = ",")

Let’s define vector of names of the alpha diversity metrics that are going to be analysed:

metric <- c("chao1", "margalef", "menhinick", "fisher_alpha", "faith_pd", "gini_index", "strong", "pielou_evenness", "shannon_entropy", "simpson") 

Table 1

The summary of the data contained in all datasets (treating all metrics as not normally distributed):

all_data <- rbind.fill(all_healthy, IBD, UC, CD, CDI, CDI_FMT, FMT_IBD_CDI)
  
all_data <- dplyr::select(all_data, shannon_entropy, chao1, menhinick, margalef, fisher_alpha, simpson, gini_index, strong, pielou_evenness, faith_pd, condition)

all_data$condition[all_data$condition=="control"] <- "healthy"
all_data$condition[all_data$condition=="crohns"] <- "CD"
all_data$condition[all_data$condition=="Cdif"] <- "CDI"
all_data <- all_data %>% filter (condition != "Donors")

table_one <- CreateTableOne(data = all_data) 
print(table_one, nonnormal = metric)
##                                 
##                                  Overall                
##   n                                2294                 
##   shannon_entropy (median [IQR])   4.49 [3.60, 5.25]    
##   chao1 (median [IQR])           172.14 [121.51, 228.93]
##   menhinick (median [IQR])         1.95 [1.35, 2.53]    
##   margalef (median [IQR])         16.38 [11.41, 21.25]  
##   fisher_alpha (median [IQR])     26.74 [17.32, 36.79]  
##   simpson (median [IQR])           0.90 [0.81, 0.94]    
##   gini_index (median [IQR])        1.00 [1.00, 1.00]    
##   strong (median [IQR])            0.69 [0.65, 0.74]    
##   pielou_evenness (median [IQR])   0.64 [0.54, 0.71]    
##   faith_pd (median [IQR])         16.81 [12.85, 21.06]  
##   condition (%)                                         
##      CD                             319 (13.9)          
##      CDI                            100 ( 4.4)          
##      CDI + CD                         6 ( 0.3)          
##      CDI + UC                         6 ( 0.3)          
##      healthy                       1823 (79.5)          
##      UC                              40 ( 1.7)
all_data$condition <- as.factor(all_data$condition)
all_data$condition <- ordered(all_data$condition, levels = c("healthy", "CD", "UC", "CDI", "CDI + CD", "CDI + UC"))
table1(~ chao1 + margalef + menhinick + fisher_alpha + faith_pd + gini_index + strong + pielou_evenness + shannon_entropy + simpson | condition, data=all_data)
healthy
(N=1823)
CD
(N=319)
UC
(N=40)
CDI
(N=100)
CDI + CD
(N=6)
CDI + UC
(N=6)
Overall
(N=2294)
chao1
Mean (SD) 194 (73.6) 128 (68.8) 106 (55.3) 61.6 (38.7) 54.5 (38.2) 70.5 (53.7) 177 (80.1)
Median [Min, Max] 188 [20.0, 516] 124 [8.00, 361] 103 [39.0, 318] 52.4 [7.00, 199] 43.2 [17.0, 119] 63.3 [17.2, 137] 172 [7.00, 516]
margalef
Mean (SD) 18.1 (6.62) 12.3 (6.55) 10.8 (5.20) 6.33 (3.63) 4.95 (3.21) 6.91 (4.99) 16.6 (7.21)
Median [Min, Max] 17.6 [2.11, 42.9] 12.7 [0.869, 28.2] 10.5 [4.16, 29.0] 5.55 [0.686, 20.1] 4.31 [1.25, 10.5] 6.62 [2.00, 13.1] 16.4 [0.686, 42.9]
menhinick
Mean (SD) 2.15 (0.805) 1.42 (0.800) 1.59 (1.02) 0.803 (0.551) 0.742 (0.469) 1.03 (0.729) 1.97 (0.878)
Median [Min, Max] 2.08 [0.269, 5.18] 1.41 [0.108, 4.05] 1.44 [0.537, 6.36] 0.626 [0.0885, 2.96] 0.648 [0.201, 1.55] 0.986 [0.310, 1.94] 1.95 [0.0885, 6.36]
fisher_alpha
Mean (SD) 30.9 (13.9) 19.6 (11.9) 17.8 (12.4) 9.08 (6.46) 6.95 (5.22) 10.5 (8.44) 28.0 (14.6)
Median [Min, Max] 29.2 [2.50, 90.9] 19.5 [1.02, 50.9] 16.2 [5.34, 75.8] 7.33 [0.778, 36.7] 5.68 [1.44, 16.3] 9.77 [2.38, 21.4] 26.7 [0.778, 90.9]
faith_pd
Mean (SD) 18.0 (5.37) 14.8 (6.58) 14.3 (10.4) 6.22 (3.14) 6.71 (3.10) 7.85 (3.47) 16.9 (6.19)
Median [Min, Max] 17.6 [3.92, 36.0] 14.0 [2.82, 40.1] 11.3 [6.06, 68.0] 5.36 [1.30, 17.2] 6.04 [2.88, 11.9] 7.63 [4.52, 12.2] 16.8 [1.30, 68.0]
gini_index
Mean (SD) 0.999 (0.00160) 0.996 (0.00513) 0.983 (0.00986) 0.987 (0.00661) 0.994 (0.00285) 0.992 (0.00466) 0.998 (0.00447)
Median [Min, Max] 1.00 [0.991, 1.00] 0.997 [0.962, 1.00] 0.983 [0.950, 0.996] 0.988 [0.966, 0.998] 0.994 [0.990, 0.998] 0.993 [0.984, 0.997] 1.00 [0.950, 1.00]
strong
Mean (SD) 0.697 (0.0723) 0.710 (0.0646) 0.660 (0.0622) 0.689 (0.0726) 0.713 (0.0421) 0.686 (0.0797) 0.697 (0.0714)
Median [Min, Max] 0.685 [0.501, 0.955] 0.707 [0.520, 0.861] 0.649 [0.577, 0.832] 0.694 [0.475, 0.887] 0.714 [0.670, 0.786] 0.696 [0.573, 0.777] 0.688 [0.475, 0.955]
pielou_evenness
Mean (SD) 0.615 (0.142) 0.579 (0.126) 0.648 (0.102) 0.564 (0.127) 0.499 (0.123) 0.600 (0.0922) 0.608 (0.139)
Median [Min, Max] 0.650 [0.0367, 0.841] 0.599 [0.174, 0.832] 0.676 [0.340, 0.770] 0.574 [0.104, 0.792] 0.549 [0.287, 0.611] 0.601 [0.491, 0.712] 0.642 [0.0367, 0.841]
shannon_entropy
Mean (SD) 4.47 (1.22) 3.83 (1.23) 4.15 (0.943) 3.19 (0.975) 2.71 (0.961) 3.22 (0.820) 4.31 (1.25)
Median [Min, Max] 4.70 [0.187, 7.02] 3.97 [0.679, 6.12] 4.41 [1.92, 5.70] 3.25 [0.431, 5.61] 2.94 [1.18, 3.60] 3.29 [2.12, 4.48] 4.49 [0.187, 7.02]
simpson
Mean (SD) 0.850 (0.160) 0.815 (0.144) 0.867 (0.0924) 0.764 (0.154) 0.708 (0.178) 0.810 (0.0781) 0.841 (0.158)
Median [Min, Max] 0.907 [0.0309, 0.984] 0.857 [0.226, 0.976] 0.905 [0.575, 0.961] 0.801 [0.110, 0.960] 0.756 [0.403, 0.875] 0.815 [0.679, 0.906] 0.898 [0.0309, 0.984]

Let’s define function for plotting violin plots that we are going to use in the whole analysis:

plot_violin <- function(df, column){
  violin <- vector('list', length(metric))
  
  for (i in 1:length(metric)){
    mean_line <- df %>% dplyr::group_by(.data[[column]]) %>% dplyr::summarise(grp_mean=mean(.data[[metric[i]]]))
    plot_data <- df %>% dplyr::group_by(.data[[column]]) %>% dplyr::mutate(m = mean(.data[[metric[i]]])) 
  
    violin[[i]] <- plot_data %>% ggplot(aes(x = .data[[metric[i]]], y = reorder(.data[[column]], -m), color = .data[[column]], fill = .data[[column]])) +
      geom_violin()+
      geom_boxplot(width=0.1, color="grey", alpha=0.2) +
      geom_vline(data = mean_line, aes(xintercept = grp_mean, color = .data[[column]]), linetype = "dashed")+ 
      labs(x = metric[i])+
      ylab("")+
      theme(legend.position="none") 
    
    if(metric[i] != "shannon_entropy" & metric[i] != "strong" & metric[i] != "gini_index"  & metric[i] != "menhinick"){
     violin[[i]] <- violin[[i]] + 
       scale_x_continuous(trans = 'log10') +
       xlab(paste(metric[i], "(log10)", sep = " ")) 
    }
  }
  return(violin)
}

Function for doing Mann-Whitney-Wilcoxon test:

do_wilcox_test <- function(df, column){
  test <- list()
  
  for (i in 1:length(metric)){
    test[[i]] <- pairwise.wilcox.test(df[[metric[i]]], df[[column]], p.adjust.method="none") %>% 
    broom::tidy() %>% add_column(parameter = metric[i], .before='group1')
    test[[i]]$p.value <- round(test[[i]]$p.value, digits = 5)
  }
  
  tests <- do.call(what = rbind, args = test)
  
  return(tests)
}

IBD and UC samples

Distributions of metrics in disease datasets

table(IBD$condition)
## 
## CD UC 
## 26  7
table(UC$condition)
## 
## UC 
## 33
#merge two datasets
UC_check <- UC 
UC_check$condition <- "UC_2"
IBD_check <-  rbind.fill(IBD, UC_check)

IBD_check$condition <- as.factor(IBD_check$condition)

table(IBD_check$condition)
## 
##   CD   UC UC_2 
##   26    7   33
nrow(IBD_check)
## [1] 66
violin_IBD_check <- vector('list', length(metric))

# Use violin function
violin_IBD_check <- plot_violin(IBD_check, "condition")

grid.arrange(violin_IBD_check[[1]], violin_IBD_check[[2]],violin_IBD_check[[3]], violin_IBD_check[[4]],violin_IBD_check[[5]], violin_IBD_check[[6]],violin_IBD_check[[7]], violin_IBD_check[[8]],violin_IBD_check[[9]], violin_IBD_check[[10]], ncol=4, top = textGrob("Distributions of 1) Shannon's index  2) Chao1 3) Menhinick's index \n4) Margalef's index 5) Fisher's index 6) Simpson 7) Gini index 8) Strong's index 9) Pielou's evenness \nand 10) Faith's PD in IBD datasets", gp=gpar(fontsize=10,font=2))) 

Mann-Whitney-Wilcoxon Test

tests_IBD_check <- list()

tests_IBD_check <- do_wilcox_test(IBD_check, "condition")

table <- tests_IBD_check %>% 
  add_column(p.adjusted = round(p.adjust(tests_IBD_check$p.value, "fdr"), digits=5), .after='p.value') %>%
  flextable() %>% 
  bold(~ p.value < 0.05, 4) %>%
  bold(~ p.adjusted < 0.05, 5) %>%
  add_header_lines(values = "Results of the Mann-Whitney-Wilcoxon test for distributions of different conditions")

table

Results of the Mann-Whitney-Wilcoxon test for distributions of different conditions

parameter

group1

group2

p.value

p.adjusted

chao1

UC

CD

0.39893

0.85485

chao1

UC_2

CD

0.08591

0.26241

chao1

UC_2

UC

0.07474

0.26241

margalef

UC

CD

0.94732

0.94854

margalef

UC_2

CD

0.20232

0.50580

margalef

UC_2

UC

0.28549

0.65882

menhinick

UC

CD

0.94732

0.94854

menhinick

UC_2

CD

0.00000

0.00000

menhinick

UC_2

UC

0.00042

0.00315

fisher_alpha

UC

CD

0.94732

0.94854

fisher_alpha

UC_2

CD

0.02336

0.10011

fisher_alpha

UC_2

UC

0.08747

0.26241

faith_pd

UC

CD

0.94854

0.94854

faith_pd

UC_2

CD

0.00000

0.00000

faith_pd

UC_2

UC

0.00000

0.00000

gini_index

UC

CD

0.81286

0.94854

gini_index

UC_2

CD

0.51926

0.91634

gini_index

UC_2

UC

0.67589

0.94854

strong

UC

CD

0.81286

0.94854

strong

UC_2

CD

0.00154

0.00924

strong

UC_2

UC

0.00810

0.04050

pielou_evenness

UC

CD

0.50339

0.91634

pielou_evenness

UC_2

CD

0.14154

0.38602

pielou_evenness

UC_2

UC

0.70161

0.94854

shannon_entropy

UC

CD

0.94854

0.94854

shannon_entropy

UC_2

CD

0.68799

0.94854

shannon_entropy

UC_2

UC

0.83458

0.94854

simpson

UC

CD

0.50339

0.91634

simpson

UC_2

CD

0.92156

0.94854

simpson

UC_2

UC

0.86186

0.94854

Based on Mann-Whitney-Wilcoxon test, alpha diversity of UC samples from second study and CD are significantly different for Menhinick’s index, Fisher’s alpha (?), Faith’s PD and Strong’s evenness. Also UC samples from first and second study are significantly different for Menhinick’s index, Faith’s PD and Strong’s evenness. We can look on these samples as one population of IBD. It seems that UC samples from second study expres lower richness and evenness that the ones (7) from the first study.

Healthy samples vs IBD samples

#merge two datasets
healthy_disease <-  rbind.fill(all_healthy, IBD, UC)

healthy_disease$condition <- as.factor(healthy_disease$condition)
healthy_disease$condition <- relevel(healthy_disease$condition, "healthy")

table(healthy_disease$condition)
## 
## healthy      CD      UC 
##    1470      26      40

Explore differences in distribution shape and mean values of groups with different conditions by ploting violin plot.

violin_IBD <- vector('list', length(metric))

# Use violin function
violin_IBD <- plot_violin(healthy_disease, "condition")

grid.arrange(violin_IBD[[1]], violin_IBD[[2]],violin_IBD[[3]], violin_IBD[[4]],violin_IBD[[5]], violin_IBD[[6]],violin_IBD[[7]], violin_IBD[[8]],violin_IBD[[9]], violin_IBD[[10]], ncol=4, top = textGrob("Distributions of 1) Shannon's index  2) Chao1 3) Menhinick's index \n4) Margalef's index 5) Fisher's index 6) Simpson 7) Gini index 8) Strong's index 9) Pielou's evenness \nand 10) Faith's PD in IBD datasets", gp=gpar(fontsize=10,font=2))) 

Mann-Whitney-Wilcoxon Test

Test whether different conditions separate into distinct distributions with Mann-Whitney-Wilcoxon test:

tests_IBD <- list()

tests_IBD <- do_wilcox_test(healthy_disease, "condition")

table1 <- tests_IBD %>% 
  add_column(p.adjusted = round(p.adjust(tests_IBD$p.value, "fdr"), digits=5), .after='p.value') %>%
  arrange(p.value)  %>%
  filter(group1=="CD") %>%
  filter(group2=="healthy") %>%
  flextable() %>% 
  bold(~ p.value < 0.05, 4) %>%
  bold(~ p.adjusted < 0.05, 5) %>%
  add_header_lines(values = "Results of the Mann-Whitney-Wilcoxon test for distributions of different conditions")

table2 <- tests_IBD %>% 
  add_column(p.adjusted = round(p.adjust(tests_IBD$p.value, "fdr"), digits=5), .after='p.value') %>%
  arrange(p.value)  %>%
  filter(group1=="UC") %>%
  filter(group2=="healthy") %>%
  flextable() %>% 
  bold(~ p.value < 0.05, 4) %>%
  bold(~ p.adjusted < 0.05, 5) %>%
  add_header_lines(values = "Results of the Mann-Whitney-Wilcoxon test for distributions of different conditions")


table1

Results of the Mann-Whitney-Wilcoxon test for distributions of different conditions

parameter

group1

group2

p.value

p.adjusted

chao1

CD

healthy

0.00000

0.00000

margalef

CD

healthy

0.00000

0.00000

faith_pd

CD

healthy

0.00000

0.00000

gini_index

CD

healthy

0.00000

0.00000

strong

CD

healthy

0.00000

0.00000

fisher_alpha

CD

healthy

0.00007

0.00015

pielou_evenness

CD

healthy

0.00831

0.01558

menhinick

CD

healthy

0.07199

0.10798

shannon_entropy

CD

healthy

0.14985

0.20434

simpson

CD

healthy

0.75357

0.76939

There is significant difference between healthy samples and CD samples in regard to Faith PD, Gini index, Chao1, Margalef’s index and Fisher alpha. No significant difference was detected for Pielou evenness, Menhinick, Strong, Shannon entropy and Simpson’s index.

table2

Results of the Mann-Whitney-Wilcoxon test for distributions of different conditions

parameter

group1

group2

p.value

p.adjusted

chao1

UC

healthy

0.00000

0.00000

margalef

UC

healthy

0.00000

0.00000

menhinick

UC

healthy

0.00000

0.00000

fisher_alpha

UC

healthy

0.00000

0.00000

faith_pd

UC

healthy

0.00000

0.00000

gini_index

UC

healthy

0.00000

0.00000

strong

UC

healthy

0.00072

0.00144

shannon_entropy

UC

healthy

0.01894

0.03157

pielou_evenness

UC

healthy

0.20476

0.26708

simpson

UC

healthy

0.48789

0.56295

It seems like there is significant difference between healthy samples and UC samples in all alpha metrics except from Shannon entropy, Strong’s evennes, Simpsons index and Pielou evenness (almost all of the “evenness” metrics except from Gini). “Richness” indices are all signifficantly different.

Now lets see which parameters show significant difference between healthy and all IBD samples:

wilcox_healthy_disease <- healthy_disease %>%
  summarise(Chao1 = wilcox.test(chao1[condition == "healthy"], chao1[condition != "healthy"])$p.value,
            Margalef = wilcox.test(margalef[condition == "healthy"], margalef[condition != "healthy"])$p.value,
            Menhinick = wilcox.test(menhinick[condition == "healthy"], menhinick[condition != "healthy"])$p.value,
            Fisher = wilcox.test(fisher_alpha[condition == "healthy"], fisher_alpha[condition != "healthy"])$p.value,
            Faith = wilcox.test(faith_pd[condition == "healthy"], faith_pd[condition != "healthy"])$p.value,
            Gini = wilcox.test(gini_index[condition == "healthy"], gini_index[condition != "healthy"])$p.value,
            Strong = wilcox.test(strong[condition == "healthy"], strong[condition != "healthy"])$p.value,
            Pielou = wilcox.test(pielou_evenness[condition == "healthy"], pielou_evenness[condition != "healthy"])$p.value,
            Shannon = wilcox.test(shannon_entropy[condition == "healthy"], shannon_entropy[condition != "healthy"])$p.value,
            Sipson = wilcox.test(simpson[condition == "healthy"], simpson[condition != "healthy"])$p.value) 

wilcox_healthy_disease <- t(wilcox_healthy_disease)
colnames(wilcox_healthy_disease) <- c("p.value")
wilcox_healthy_disease <- data.frame(alpha_metric = row.names(wilcox_healthy_disease), wilcox_healthy_disease)
wilcox_healthy_disease$p.value <- round(wilcox_healthy_disease$p.value, digits = 5)

wilcox_healthy_disease %>%
  add_column(p.adjusted = round(p.adjust(wilcox_healthy_disease$p.value, "fdr"), digits = 5), .after='p.value') %>%
  arrange(p.value)  %>%
  flextable() %>%
  bold(~ p.value < 0.05, 2) %>%
  bold(~ p.adjusted < 0.05, 3) %>%
  add_header_lines(values = "Results of the Mann-Whitney-Wilcoxon test for distributions of alpha diversity indices between healthy and IBD samples")

Results of the Mann-Whitney-Wilcoxon test for distributions of alpha diversity indices between healthy and IBD samples

alpha_metric

p.value

p.adjusted

Chao1

0.00000

0.00000

Margalef

0.00000

0.00000

Fisher

0.00000

0.00000

Gini

0.00000

0.00000

Strong

0.00000

0.00000

Menhinick

0.00247

0.00412

Shannon

0.00686

0.00980

Pielou

0.00896

0.01120

Faith

0.16918

0.18798

Sipson

0.46537

0.46537

There is significant difference between healthy samples and those that are not healthy in all alpha metrics except from Pielou evenness, Simpson’s index and Strong’s evenness.

Kruskal-Wallis Test

Kruskal-Wallis Testis a non-parametric method for testing whether samples originate from the same distribution. It is used for comparing two or more independent samples of equal or different sample sizes. It extends the Mann–Whitney U test, which is used for comparing only two groups (source: Wikipedia)

kruskal_results <- healthy_disease %>%
  summarise(Chao1 = kruskal.test(healthy_disease$chao1 ~ healthy_disease$condition)$p.value,
            Margalef = kruskal.test(healthy_disease$margalef ~ healthy_disease$condition)$p.value,
            Menhinick = kruskal.test(healthy_disease$menhinick ~ healthy_disease$condition)$p.value,
            Fisher = kruskal.test(healthy_disease$fisher_alpha ~ healthy_disease$condition)$p.value,
            Faith = kruskal.test(healthy_disease$faith_pd ~ healthy_disease$condition)$p.value,
            Gini = kruskal.test(healthy_disease$gini_index ~ healthy_disease$condition)$p.value,
            Strong = kruskal.test(healthy_disease$strong ~ healthy_disease$condition)$p.value,
            Pielou = kruskal.test(healthy_disease$pielou_evenness ~ healthy_disease$condition)$p.value,
            Shannon = kruskal.test(healthy_disease$shannon_entropy ~ healthy_disease$condition)$p.value,
            Sipson = kruskal.test(healthy_disease$simpson ~ healthy_disease$condition)$p.value)

kruskal_results_df <- as.data.frame(t(kruskal_results))
colnames(kruskal_results_df) <- c("p.value")
kruskal_results_df <- data.frame(alpha_metric = row.names(kruskal_results_df), kruskal_results_df)
kruskal_results_df$p.value <- round(kruskal_results_df$p.value, digits = 5)


kruskal_results_df %>% 
  add_column(p.adjusted = round(p.adjust(kruskal_results_df$p.value, "fdr"), digits = 5), .after='p.value') %>%
  arrange(p.value)  %>%
  flextable() %>%
  bold(~ p.value < 0.05, 2) %>%
   bold(~ p.adjusted < 0.05, 3) %>%
  add_header_lines(values = "Results of the Kruskal-Wallis test for differentiation of alpha diversity indices across different conditions")

Results of the Kruskal-Wallis test for differentiation of alpha diversity indices across different conditions

alpha_metric

p.value

p.adjusted

Chao1

0.00000

0.00000

Margalef

0.00000

0.00000

Menhinick

0.00000

0.00000

Fisher

0.00000

0.00000

Faith

0.00000

0.00000

Gini

0.00000

0.00000

Strong

0.00000

0.00000

Pielou

0.01456

0.01820

Shannon

0.02423

0.02692

Sipson

0.75092

0.75092

Kruskal-Wallis test shows similar results as Mann-Withney-Wilcoxon test, except that in this test Shannon entropy is also not significantly different in healthy vs unhealthy comparison. The downside of this analysis is the small sample size of IBD dataset (UC and CD).

Longitudinal Chron’s disease analysis

Since previous analysis consisted from small sample size for IBD dataset and Chron’s disease samples showed unexpectedly high alpha diversity in various indexes, lets incorporate another study, this time only with CD patients. The sample size of this data set is 646. The number of patients and controls is balanced and controls are close relatives of patients. Each patient was sampled multiple times during the period of 6 weeks. Some patients previously underwent a surgical intervention on some part of their digestive system.

table(CD$description, CD$condition)
##                           
##                            control crohns
##   father family 01-00010        27      0
##   father family 01-00011        20      0
##   father family 01-00012        54      0
##   father family 01-00013        27      0
##   fecal sample index pt 20       0     18
##   fecal sample index pt 21       0     18
##   fecal sample index pt 23       0     17
##   fecal sample index pt 24       0     23
##   fecal sample index pt 25       0     13
##   fecal sample index pt 26       0     16
##   fecal sample index pt 27       0     10
##   fecal sample index pt 31       0     10
##   fecal sample index pt 32       0     18
##   fecal sample index pt 33       0     19
##   fecal sample index pt 35       0     11
##   fecal sample index pt 36       0      5
##   fecal sample index pt 37       0     21
##   fecal sample index pt 39       0     21
##   index family 01-00012          0     47
##   index family 01-00013          0     26
##   mother family 01-00010        25      0
##   mother family 01-00011        23      0
##   mother family 01-00012        57      0
##   mother family 01-00013        27      0
##   sibling family 01-00010       18      0
##   sibling family 01-00012       55      0
##   sibling family 01-00013       20      0
table(CD$condition)
## 
## control  crohns 
##     353     293
table(CD$surgery_and_ibd)
## 
##          control           crohns crohns (surgery) 
##              353              159              134

Distribution and mean differences between conditions

Lets do Violin plot to see the difference in alpha diversity between cases and controls in this study:

violin_CDa <- vector('list', length(metric))

# Use violin function
violin_CDa <- plot_violin(CD, "condition")

grid.arrange(violin_CDa[[1]], violin_CDa[[2]], violin_CDa[[3]], violin_CDa[[4]], violin_CDa[[5]], violin_CDa[[6]], violin_CDa[[7]],  violin_CDa[[9]], violin_CDa[[10]], ncol=4, top = textGrob("Distributions of 1) Shannon's index  2) Chao1 3) Menhinick's index \n4) Margalef's index 5) Fisher's index 6) Simpson 7) Gini index 8) Strong's index 9) Pielou's evenness \nand 10) Faith's PD in longitudinal CD datasets", gp=gpar(fontsize=10,font=2))) 

Lets now compare alpha diversity between cases and controls but taking into account if they underwent surgery:

violin_CDb <- vector('list', length(metric))

# Use violin function
violin_CDb <- plot_violin(CD, "surgery_and_ibd")

#violin_CDb

grid.arrange(violin_CDb[[1]], violin_CDb[[2]], violin_CDb[[3]], violin_CDb[[4]], violin_CDb[[5]], violin_CDb[[6]], violin_CDb[[7]], violin_CDb[[8]], violin_CDb[[9]], violin_CDb[[10]], ncol=3) 

Lets plot differences in effect of different surgical interventions:

violin_CD_surg <- vector('list', length(metric))

# Use violin function
violin_CD_surg <- plot_violin(CD, "surgery_type")

violin_CD_surg
## [[1]]

## 
## [[2]]

## 
## [[3]]

## 
## [[4]]

## 
## [[5]]

## 
## [[6]]

## 
## [[7]]

## 
## [[8]]

## 
## [[9]]

## 
## [[10]]

Finally, lets compare cases and control from this study with cases from previous IBD studies and AGP controls:

CD_1 <- healthy_disease[healthy_disease$condition != "UC",]

CD_surg <- CD
CD_surg$condition <- NULL
names(CD_surg)[names(CD_surg) == 'surgery_and_ibd'] <- 'condition'

CD_check <- rbind.fill(CD_1, CD_surg)

CD_check$condition[CD_check$condition == "CD"] <-'CD_1'
CD_check$condition[CD_check$condition == "crohns"] <-'CD_2'
CD_check$condition[CD_check$condition == "crohns (surgery)"] <-'CD_surgery'
CD_check$condition[CD_check$condition == "healthy"] <-'control(AGP)'
CD_check$condition[CD_check$condition == "control"] <-'control_2'
violin_CD_check <- vector('list', length(metric))

# Use violin function
violin_CD_check <- plot_violin(CD_check, "condition")

violin_CD_check
## [[1]]

## 
## [[2]]

## 
## [[3]]

## 
## [[4]]

## 
## [[5]]

## 
## [[6]]

## 
## [[7]]

## 
## [[8]]

## 
## [[9]]

## 
## [[10]]

Here we can see that alpha diversity of CD samples from previous data set have similar mean values as CD samples for all alpha indexes except from Faith’s PD which is much higher and Gini index which is much lower than in this longitudinal study. On the other side, CD samples with past surgery had much lower mean in all measures.

This probably mean that high diversity of CD samples is not a mistake, those are probably just samples with better clinical status.

When it comes to samples with previous surgery, it is unclear whether the lower alpha diversity comes from the severity of CD symptoms or as a direct consequence of surgery (no info about the time that passed after surgery).

test_CD_1 <- list()
test_CD <- list()

test_CD_1 <- do_wilcox_test(CD_1, "condition")
test_CD <- do_wilcox_test(CD, "surgery_and_ibd")

table_CD_1 <- test_CD_1 %>% 
  add_column(p.adjusted = round(p.adjust(test_CD_1$p.value, "fdr"), digits=5), .after='p.value') %>%
  arrange(p.value, parameter)  %>%
  flextable() %>% 
  bold(~ p.value < 0.05, 4) %>%
  bold(~ p.adjusted < 0.05, 5) %>%
  add_header_lines(values = "Crohns vs controls in CD_1 dataset")

table_CD_2 <- test_CD %>% 
  add_column(p.adjusted = round(p.adjust(test_CD$p.value, "fdr"), digits=5), .after='p.value') %>%
  arrange(p.value)  %>%
  filter(group1 == "crohns")  %>%
  flextable() %>% 
  bold(~ p.value < 0.05, 4) %>%
  bold(~ p.adjusted < 0.05, 5) %>%
  add_header_lines(values = "Crohns vs controls in CD dataset")

table_CD_3 <- test_CD %>% 
  add_column(p.adjusted = round(p.adjust(test_CD$p.value, "fdr"), digits=5), .after='p.value') %>%
  arrange(p.value)  %>%
  filter(group1 != "crohns" & group2 == "control")  %>%
  flextable() %>% 
  bold(~ p.value < 0.05, 4) %>%
  bold(~ p.adjusted < 0.05, 5) %>%
  add_header_lines(values = "Crohns (surgery) vs controls in CD dataset")

Results of the Mann-Whitney-Wilcoxon test for diversity distributions of healthy and CD samples in first study:

table_CD_1

Crohns vs controls in CD_1 dataset

parameter

group1

group2

p.value

p.adjusted

chao1

CD

healthy

0.00000

0.00000

faith_pd

CD

healthy

0.00000

0.00000

gini_index

CD

healthy

0.00000

0.00000

margalef

CD

healthy

0.00000

0.00000

strong

CD

healthy

0.00000

0.00000

fisher_alpha

CD

healthy

0.00007

0.00012

pielou_evenness

CD

healthy

0.00831

0.01187

menhinick

CD

healthy

0.07199

0.08999

shannon_entropy

CD

healthy

0.14985

0.16650

simpson

CD

healthy

0.75357

0.75357

This table shows that, in first study, half of the indices don’t show significant difference between healthy and CD samples (Pielou, Menhinick, Strong, Shannon and Simpson).

Results of the Mann-Whitney-Wilcoxon test for diversity distributions of healthy and CD samples in second (longitudinal) study:

table_CD_2

Crohns vs controls in CD dataset

parameter

group1

group2

p.value

p.adjusted

simpson

crohns

control

0.00007

0.00010

pielou_evenness

crohns

control

0.00010

0.00014

shannon_entropy

crohns

control

0.00067

0.00091

chao1

crohns

control

0.00074

0.00097

strong

crohns

control

0.00087

0.00109

gini_index

crohns

control

0.00926

0.01068

faith_pd

crohns

control

0.04426

0.04918

margalef

crohns

control

0.06702

0.06702

menhinick

crohns

control

0.06702

0.06702

fisher_alpha

crohns

control

0.06702

0.06702

Helathy and CD samples without surgery are different in all except three alpha diversity indices: Margalef, Menhinick and Fisher alpha.

table_CD_3

Crohns (surgery) vs controls in CD dataset

parameter

group1

group2

p.value

p.adjusted

chao1

crohns (surgery)

control

0

0

margalef

crohns (surgery)

control

0

0

menhinick

crohns (surgery)

control

0

0

fisher_alpha

crohns (surgery)

control

0

0

faith_pd

crohns (surgery)

control

0

0

gini_index

crohns (surgery)

control

0

0

strong

crohns (surgery)

control

0

0

pielou_evenness

crohns (surgery)

control

0

0

shannon_entropy

crohns (surgery)

control

0

0

simpson

crohns (surgery)

control

0

0

All metrics show significant difference between healthy and Crohn’s samples that undergone some type of surgical procedure.

CD_check_w <- CD_check %>% filter(CD_check$condition != "control(AGP)" & CD_check$condition != "control_2" )

test_CD_3 <- list()

test_CD_3 <- do_wilcox_test(CD_check_w, "condition")

table_CD_3 <- test_CD_3 %>% 
  add_column(p.adjusted = round(p.adjust(test_CD_3$p.value, "fdr"), digits=5), .after='p.value') %>%
  arrange(p.value, parameter)  %>%
  filter(group1 != "CD_surgery")  %>%
  flextable() %>% 
  bold(~ p.value < 0.05, 4) %>%
  bold(~ p.adjusted < 0.05, 5) %>%
  add_header_lines(values = "Results of the Mann-Whitney-Wilcoxon test for distributions of different groups of Crhohn's disease patients")

table_CD_3

Results of the Mann-Whitney-Wilcoxon test for distributions of different groups of Crhohn's disease patients

parameter

group1

group2

p.value

p.adjusted

faith_pd

CD_2

CD_1

0.00000

0.00000

gini_index

CD_2

CD_1

0.00000

0.00000

menhinick

CD_2

CD_1

0.00000

0.00000

strong

CD_2

CD_1

0.00000

0.00000

pielou_evenness

CD_2

CD_1

0.00015

0.00024

chao1

CD_2

CD_1

0.00586

0.00732

margalef

CD_2

CD_1

0.01065

0.01278

simpson

CD_2

CD_1

0.22596

0.24210

fisher_alpha

CD_2

CD_1

0.42371

0.43832

shannon_entropy

CD_2

CD_1

0.68844

0.68844

test_CD_check <- list()

test_CD_check <- do_wilcox_test(CD_check, "condition")

test_CD_check %>% 
  add_column(p.adjusted = round(p.adjust(test_CD_check$p.value, "fdr"), digits=5), .after='p.value') %>%
  filter(group1 == "control(AGP)" & group2 == "control_2") %>%
  arrange(p.value, parameter)  %>%
  flextable() %>% 
  bold(~ p.value < 0.05, 4) %>%
  bold(~ p.adjusted < 0.05, 5) %>%
  add_header_lines(values = "AGP vs controls in CD_2 dataset")

AGP vs controls in CD_2 dataset

parameter

group1

group2

p.value

p.adjusted

fisher_alpha

control(AGP)

control_2

0.00000

0.00000

gini_index

control(AGP)

control_2

0.00000

0.00000

margalef

control(AGP)

control_2

0.00000

0.00000

menhinick

control(AGP)

control_2

0.00000

0.00000

chao1

control(AGP)

control_2

0.00001

0.00002

faith_pd

control(AGP)

control_2

0.00045

0.00066

simpson

control(AGP)

control_2

0.20806

0.22615

pielou_evenness

control(AGP)

control_2

0.41350

0.43526

shannon_entropy

control(AGP)

control_2

0.52903

0.54539

strong

control(AGP)

control_2

0.77432

0.77432

This table confirms that CD samples (without surgery) from two studies significantly differ in all except from Simpson, Fisher and Shannon index. Also healthy controls from longitudinal study are significantly different from AGP healthy population in all metrics except Pielou, Simpson and Shannon index. This tells us that we probably shouldn’t mix these datasets since they show different trends. The reason is probably the heterogeneity of IBD conditions and severity level which is not properly annotated.

Now, lets merge all data sets with IBD and AGP as control:

CD_merge <- CD %>%
  filter(condition != "not applicable")

CD_merge$condition[CD_merge$condition=="control"] <- "healthy"
CD_merge$condition[CD_merge$condition=="crohns"] <- "CD"

# healthy_disease_2 <- rbind.fill(healthy_disease, before_trans, CD_merge)
healthy_disease_2 <- rbind.fill(all_healthy, IBD, UC, CD_merge)

# Sizes of each dataset
table(healthy_disease_2$condition)
## 
##      CD healthy      UC 
##     319    1823      40
# Do Mann-Whitney-Wilcoxon test to see which parameters show significant differenc between healthy and unhealthy samples
wilcox_p_value <- healthy_disease_2 %>%
  summarise(Shannon = wilcox.test(shannon_entropy[condition == "healthy"], shannon_entropy[condition != "healthy"])$p.value,
            Chao1 = wilcox.test(chao1[condition == "healthy"], chao1[condition != "healthy"])$p.value,
            Menhinick = wilcox.test(menhinick[condition == "healthy"], menhinick[condition != "healthy"])$p.value,
            Margalef = wilcox.test(margalef[condition == "healthy"], margalef[condition != "healthy"])$p.value,
            Simpson = wilcox.test(simpson[condition == "healthy"], simpson[condition != "healthy"])$p.value,
            Fisher = wilcox.test(fisher_alpha[condition == "healthy"], fisher_alpha[condition != "healthy"])$p.value,
            Pielou = wilcox.test(pielou_evenness[condition == "healthy"], pielou_evenness[condition != "healthy"])$p.value,
            Gini = wilcox.test(gini_index[condition == "healthy"], gini_index[condition != "healthy"])$p.value,
            Strong = wilcox.test(strong[condition == "healthy"], strong[condition != "healthy"])$p.value,
            Faith = wilcox.test(faith_pd[condition == "healthy"], faith_pd[condition != "healthy"])$p.value) 

wilcox_p_value <- t(wilcox_p_value)
colnames(wilcox_p_value) <- c("p.value")
wilcox_p_value <- data.frame(Alpha_Metric = row.names(wilcox_p_value), wilcox_p_value)
wilcox_p_value$p.value <- round(wilcox_p_value$p.value, digits = 5)

wilcox_p_value %>%
  add_column(p.adjusted = round(p.adjust(wilcox_p_value$p.value, "fdr"), digits=5), .after='p.value') %>%
  flextable() %>%
  bold(~ p.value < 0.05, 2) %>%
  bold(~ p.adjusted < 0.05, 3) %>%
  add_header_lines(values = "Results of the Mann-Whitney-Wilcoxon test for distributions of different parameters between healthy and unhealthy samples")

Results of the Mann-Whitney-Wilcoxon test for distributions of different parameters between healthy and unhealthy samples

Alpha_Metric

p.value

p.adjusted

Shannon

0.00000

0.00000

Chao1

0.00000

0.00000

Menhinick

0.00000

0.00000

Margalef

0.00000

0.00000

Simpson

0.00000

0.00000

Fisher

0.00000

0.00000

Pielou

0.00000

0.00000

Gini

0.00000

0.00000

Strong

0.00071

0.00071

Faith

0.00000

0.00000

Kruskal-Wallis test

kruskal_results <- healthy_disease_2 %>%
  summarise(Shannon = kruskal.test(healthy_disease_2$shannon_entropy ~ healthy_disease_2$condition)$p.value,
  Chao1 = kruskal.test(healthy_disease_2$chao1 ~ healthy_disease_2$condition)$p.value,
  Fisher = kruskal.test(healthy_disease_2$fisher_alpha ~ healthy_disease_2$condition)$p.value,
  Margalef =kruskal.test(healthy_disease_2$margalef ~ healthy_disease_2$condition)$p.value,
  Simpson = kruskal.test(healthy_disease_2$simpson ~ healthy_disease_2$condition)$p.value,
  Menhinick = kruskal.test(healthy_disease_2$menhinick ~ healthy_disease_2$condition)$p.value,
  Pielou = kruskal.test(healthy_disease_2$pielou_evenness ~ healthy_disease_2$condition)$p.value,
  Gini = kruskal.test(healthy_disease_2$gini_index ~ healthy_disease_2$condition)$p.value,
  Strong = kruskal.test(healthy_disease_2$strong ~ healthy_disease_2$condition)$p.value,
  Faith = kruskal.test(healthy_disease_2$faith_pd ~ healthy_disease_2$condition)$p.value)

kruskal_results <- as.data.frame(t(kruskal_results))
colnames(kruskal_results) <- c("p.value")
kruskal_results <- data.frame(Alpha_Metric = row.names(kruskal_results), kruskal_results)
kruskal_results$p.value <- round(kruskal_results$p.value, digits = 5)

kruskal_results %>% 
  add_column(p.adjusted = round(p.adjust(kruskal_results$p.value, "fdr"), digits=5), .after='p.value') %>%
  flextable() %>%
  bold(~ p.value < 0.05, 2) %>%
   bold(~ p.adjusted < 0.05, 3) %>%
  add_header_lines(values = "Results of the Kruskal-Wallis test for differentiation of different parameters across different conditions")

Results of the Kruskal-Wallis test for differentiation of different parameters across different conditions

Alpha_Metric

p.value

p.adjusted

Shannon

0

0

Chao1

0

0

Fisher

0

0

Margalef

0

0

Simpson

0

0

Menhinick

0

0

Pielou

0

0

Gini

0

0

Strong

0

0

Faith

0

0

As expected, if we mix data sets the results show significant difference between healthy and unhealthy samples for all metrics. However, this conclusion can be due to the heterogeniety of data.

CDI data

Let’s see how Clostridium difficile infection affects the alpha diversity of gut microbiome:

table(CDI$PPI_use)
## 
##  No Yes 
##  53  20
table(CDI$prior_antibiotics)
## 
##  No Yes 
##  32  41
table(CDI$response_to_treatment)
## 
## failure success 
##      10      63
table(CDI$recurrence)
## 
##  No Yes 
##  54  19
table(CDI$severe_CDI)
## 
##  No Yes 
##  68   5
#install.packages("expss")
library(expss)

cross_cases(CDI, severe_CDI, prior_antibiotics)
 prior_antibiotics 
 No   Yes 
 severe_CDI 
   No  31 37
   Yes  1 4
   #Total cases  32 41
cross_cases(CDI, severe_CDI, response_to_treatment)
 response_to_treatment 
 failure   success 
 severe_CDI 
   No  8 60
   Yes  2 3
   #Total cases  10 63
cross_cases(CDI, severe_CDI, recurrence)
 recurrence 
 No   Yes 
 severe_CDI 
   No  49 19
   Yes  5
   #Total cases  54 19
cross_cases(CDI, response_to_treatment, recurrence)
 recurrence 
 No   Yes 
 response_to_treatment 
   failure  5 5
   success  49 14
   #Total cases  54 19

Check whether increased BMI affects the alpha diversity score:

CDI$condition <- "CDI"

for (i in 1:nrow(CDI)){
  if (CDI$BMI[i] < 25){
    CDI$BMI_cat[i] <- "normal"
  } else {
    CDI$BMI_cat[i] <- "obese"
  } 
}

table(CDI$BMI_cat)
## 
## normal  obese 
##     28     45

More than half samples have BMI over 25 which classifies them in obese group. However, lets chech whether there is significant difference in alpha diversity regarding the BMI cathegory:

violin_CDI_BMI <- vector('list', length(metric))

# Use violin function
violin_CDI_BMI <- plot_violin(CDI, "BMI_cat")

#violin_CDb

grid.arrange(violin_CDI_BMI[[1]], violin_CDI_BMI[[2]], violin_CDI_BMI[[3]], violin_CDI_BMI[[4]], violin_CDI_BMI[[5]], violin_CDI_BMI[[6]], violin_CDI_BMI[[7]], violin_CDI_BMI[[8]], violin_CDI_BMI[[9]], violin_CDI_BMI[[10]], ncol=3) 

test_CDI_BMI <- list()

test_CDI_BMI <- do_wilcox_test(CDI, "BMI_cat")

test_CDI_BMI <- test_CDI_BMI %>% 
  add_column(p.adjusted = round(p.adjust(test_CDI_BMI$p.value, "fdr"), digits=5), .after='p.value') %>%
  arrange(p.value, parameter)  %>%
  flextable() %>% 
  bold(~ p.value < 0.05, 4) %>%
  bold(~ p.adjusted < 0.05, 5) %>%
  add_header_lines(values = "Results of the Mann-Whitney-Wilcoxon test for...")

test_CDI_BMI

Results of the Mann-Whitney-Wilcoxon test for...

parameter

group1

group2

p.value

p.adjusted

chao1

obese

normal

0.24259

0.61767

fisher_alpha

obese

normal

0.24707

0.61767

margalef

obese

normal

0.24707

0.61767

menhinick

obese

normal

0.24707

0.61767

faith_pd

obese

normal

0.51662

0.95703

gini_index

obese

normal

0.72254

0.95703

shannon_entropy

obese

normal

0.73950

0.95703

simpson

obese

normal

0.82605

0.95703

strong

obese

normal

0.86133

0.95703

pielou_evenness

obese

normal

0.99550

0.99550

Obese and normal samples do not diverge significantly in alpha diversity, so there is no need to discard more than half of the samples due to their increased BMI. Now lets compare CDI samples with AGP control:

CDI_check <- rbind.fill(CDI, all_healthy)

violin_CDI_healthy <- vector('list', length(metric))

# Use violin function
violin_CDI_healthy <- plot_violin(CDI_check, "condition")

#violin_CDb

grid.arrange(violin_CDI_healthy[[1]], violin_CDI_healthy[[2]], violin_CDI_healthy[[3]], violin_CDI_healthy[[4]], violin_CDI_healthy[[5]], violin_CDI_healthy[[6]], violin_CDI_healthy[[7]], violin_CDI_healthy[[8]], violin_CDI_healthy[[9]], violin_CDI_healthy[[10]], ncol=3) 

test_CDI_healthy <- list()

test_CDI_healthy <- do_wilcox_test(CDI_check, "condition")

test_CDI_healthy <- test_CDI_healthy %>% 
  add_column(p.adjusted = round(p.adjust(test_CDI_healthy$p.value, "fdr"), digits=5), .after='p.value') %>%
  arrange(p.value, parameter)  %>%
  flextable() %>% 
  bold(~ p.value < 0.05, 4) %>%
  bold(~ p.adjusted < 0.05, 5) %>%
  add_header_lines(values = "Results of the Mann-Whitney-Wilcoxon test for distributions of different groups of Crhohn's disease patients")

test_CDI_healthy

Results of the Mann-Whitney-Wilcoxon test for distributions of different groups of Crhohn's disease patients

parameter

group1

group2

p.value

p.adjusted

chao1

healthy

CDI

0.00000

0.00000

faith_pd

healthy

CDI

0.00000

0.00000

fisher_alpha

healthy

CDI

0.00000

0.00000

gini_index

healthy

CDI

0.00000

0.00000

margalef

healthy

CDI

0.00000

0.00000

menhinick

healthy

CDI

0.00000

0.00000

shannon_entropy

healthy

CDI

0.00000

0.00000

simpson

healthy

CDI

0.00000

0.00000

pielou_evenness

healthy

CDI

0.00004

0.00004

strong

healthy

CDI

0.99646

0.99646

Means of all alpha diversity indices of CDI samples are significantly different from means of the healthy control population. Based on violin plots, both richness and evenness indices show lower values in CDI samples.

Short- and Longterm changes after fecal transplantation for recurrent CDI

With this data set of rnrow(CDI_FMT)` samples we will explore whether there FMT for recurrent CDI affects the improvement of alpha diversity. Samples from 4 patients were collected in multiple time points after transplantation

table(CDI_FMT$disease_state)
## 
## post-FMT  Pre-FMT 
##       88        4
table(CDI_FMT$animations_subject)
## 
## CD1 CD2 CD3 CD4 
##  33  22  13  24
CDI_FMT$animations_subject[CDI_FMT$animations_subject == "CD1"] <-'subject_1'
CDI_FMT$animations_subject[CDI_FMT$animations_subject == "CD2"] <-'subject_2'
CDI_FMT$animations_subject[CDI_FMT$animations_subject == "CD3"] <-'subject_3'
CDI_FMT$animations_subject[CDI_FMT$animations_subject == "CD4"] <-'subject_4'
progression <- vector('list', length(metric))

for (i in 1:length(metric)){
  progression[[i]] <- CDI_FMT %>% ggplot(aes(x=day_since_fmt, y= .data[[metric[i]]], color= CDI_FMT$animations_subj)) +
    geom_line()+
    geom_point()+
    facet_wrap(dplyr::vars(CDI_FMT$animations_subject), ncol=2)
}

progression
## [[1]]

## 
## [[2]]

## 
## [[3]]

## 
## [[4]]

## 
## [[5]]

## 
## [[6]]

## 
## [[7]]

## 
## [[8]]

## 
## [[9]]

## 
## [[10]]

for (i in 1:length(metric)) {
  progression[[i]] <- CDI_FMT %>% ggplot(aes(x=day_since_fmt, y= .data[[metric[i]]], color= CDI_FMT$animations_subj)) +
    geom_line()+
    geom_point()+
    theme(legend.position="none")
}

grid.arrange(progression[[1]], progression[[2]], progression[[3]], progression[[4]], progression[[5]], ncol=3)

grid.arrange(progression[[6]], progression[[7]], progression[[8]], progression[[9]], progression[[10]], ncol=3)

subjects <- unique(CDI_FMT$animations_subject)

data <- CDI_FMT[CDI_FMT$animations_subject == subjects[1],]
  
for (i in 1:length(metric)) {
  progression[[i]] <- data %>% ggplot(aes(x=day_since_fmt, y= .data[[metric[i]]])) +
    geom_line()+
    geom_point()
}

grid.arrange(progression[[1]], progression[[2]], progression[[3]], progression[[4]], progression[[5]], progression[[6]], progression[[7]], progression[[8]], progression[[9]], progression[[10]], ncol=3)

Even though the value is fluctuating as the time passes, we can see general trend of improvement of alpha diversity after FMT in all subjects.

Fecal transplant data analysis

From the paper: “A recent study suggests significantly lower response of CDI to FMT in patients with underlying inflammatory bowel disease (IBD)”.

The original study is looking at the changes in community composition (taxonomical) in patients after FMT. Lets examine what happens with alpha diversity as the time is passing after transplantation:

table(FMT_IBD_CDI$condition)
## 
##      CDI CDI + CD CDI + UC   Donors 
##       27        6        6        1
table(FMT_IBD_CDI$day_since_fmt)
## 
##       -1       28        7 NA-Donor 
##       14       11       14        1
violin_trans_2 <- vector('list', length(metric))

for (i in 1:length(metric)){
  mean_line <- FMT_IBD_CDI %>% dplyr::group_by(condition, day_since_fmt) %>% dplyr::summarise(grp_mean = mean(.data[[metric[i]]]))

  violin_trans_2[[i]] <- FMT_IBD_CDI %>%  
    mutate(across(day_since_fmt, factor, levels=c("-1","7","28","NA-Donor"))) %>% 
    ggplot(aes(x = .data[[metric[i]]], y = day_since_fmt, color = day_since_fmt, fill = day_since_fmt)) +
    geom_violin()+
    geom_boxplot(width=0.1, color="grey", alpha=0.2) +
    geom_vline(data = mean_line, aes(xintercept = grp_mean, color = day_since_fmt), linetype = "dashed")+ 
    labs(x = metric[i])+
    ylab("") +
    facet_wrap(dplyr::vars(condition), nrow=1)+
    theme(legend.position="none") 
  
  if(metric[i] != "shannon_entropy" & metric[i] !="strong" & metric[i] != "gini_index"  &  metric[i] != "menhinick"){
     violin_trans_2 [[i]] <- violin_trans_2 [[i]] + 
       scale_x_continuous(trans = 'log10') +
       xlab(paste(metric[i], "(log10)", sep = " ")) 
  }
}

#plots for Shannon entropy
violin_trans_2
## [[1]]

## 
## [[2]]

## 
## [[3]]

## 
## [[4]]

## 
## [[5]]

## 
## [[6]]

## 
## [[7]]

## 
## [[8]]

## 
## [[9]]

## 
## [[10]]

These plots show consistent trend of improvement of alpha diversity in 7th and 28th day after fecal microbiota transplantation. For all alpha indexes we can see that the diversity is increasing toward donor’s mean value. However, samples with underlying CD show a trend of diversity decrease in 28th day compared to 7th day. This shows that IBD can decrease the efficiency of FMT in CDI recepients.

cond <- c("CDI + UC", "CDI", "CDI + CD")
test_CDI_trans <- list()
table <- list()

for (i in 1:length(cond)){
  FMT_IBD_CDI_1 <- FMT_IBD_CDI %>%
    filter(condition == cond[i])
  
  test_CDI_trans <- do_wilcox_test(FMT_IBD_CDI_1, "day_since_fmt")

  table <- test_CDI_trans %>%
    add_column(p.adjusted = round(p.adjust(test_CDI_trans$p.value, "fdr"), digits=5), .after='p.value') %>%
    flextable() %>%
    bold(~ p.value < 0.05, 4) %>%
    bold(~ p.adjusted < 0.05, 5) %>%
    add_header_lines(values = paste("Results of the Mann-Whitney-Wilcoxon test for condition:", cond[i], sep = " "))
   
 print(table)
 
 test_CDI_trans <- list()
}
## a flextable object.
## col_keys: `parameter`, `group1`, `group2`, `p.value`, `p.adjusted` 
## header has 2 row(s) 
## body has 30 row(s) 
## original dataset sample: 
##   parameter group1 group2 p.value p.adjusted
## 1     chao1     28     -1 0.33333    0.62499
## 2     chao1      7     -1 0.66667    0.74074
## 3     chao1      7     28 0.33333    0.62499
## 4  margalef     28     -1 0.33333    0.62499
## 5  margalef      7     -1 0.33333    0.62499
## a flextable object.
## col_keys: `parameter`, `group1`, `group2`, `p.value`, `p.adjusted` 
## header has 2 row(s) 
## body has 30 row(s) 
## original dataset sample: 
##   parameter group1 group2 p.value p.adjusted
## 1     chao1     28     -1 0.00010    0.00030
## 2     chao1      7     -1 0.00001    0.00008
## 3     chao1      7     28 0.60088    0.72106
## 4  margalef     28     -1 0.00010    0.00030
## 5  margalef      7     -1 0.00001    0.00008
## a flextable object.
## col_keys: `parameter`, `group1`, `group2`, `p.value`, `p.adjusted` 
## header has 2 row(s) 
## body has 30 row(s) 
## original dataset sample: 
##   parameter group1 group2 p.value p.adjusted
## 1     chao1     28     -1 0.66667    0.68966
## 2     chao1      7     -1 0.33333    0.45454
## 3     chao1      7     28 0.33333    0.45454
## 4  margalef     28     -1 0.66667    0.68966
## 5  margalef      7     -1 0.33333    0.45454

From these three tables we can see that only for CDI patients without underlying IBD show signifficant improvement after FMT.

Conclusions

Based on previous analysis we can draw following conclusions:

Session information

sessionInfo()
## R version 4.1.2 (2021-11-01)
## Platform: x86_64-pc-linux-gnu (64-bit)
## Running under: Ubuntu 22.04.2 LTS
## 
## Matrix products: default
## BLAS:   /usr/lib/x86_64-linux-gnu/blas/libblas.so.3.10.0
## LAPACK: /usr/lib/x86_64-linux-gnu/lapack/liblapack.so.3.10.0
## 
## locale:
##  [1] LC_CTYPE=en_US.UTF-8       LC_NUMERIC=C              
##  [3] LC_TIME=es_ES.UTF-8        LC_COLLATE=en_US.UTF-8    
##  [5] LC_MONETARY=es_ES.UTF-8    LC_MESSAGES=en_US.UTF-8   
##  [7] LC_PAPER=es_ES.UTF-8       LC_NAME=C                 
##  [9] LC_ADDRESS=C               LC_TELEPHONE=C            
## [11] LC_MEASUREMENT=es_ES.UTF-8 LC_IDENTIFICATION=C       
## 
## attached base packages:
## [1] grid      stats     graphics  grDevices utils     datasets  methods  
## [8] base     
## 
## other attached packages:
##  [1] expss_0.11.4          maditr_0.8.3          table1_1.4.3         
##  [4] tableone_0.13.2       here_1.0.1            ggplotify_0.1.0      
##  [7] RColorBrewer_1.1-3    ComplexHeatmap_2.10.0 tibble_3.2.1         
## [10] flextable_0.9.1       dplyr_1.1.1           purrr_1.0.1          
## [13] plyr_1.8.8            gridExtra_2.3         ggplot2_3.4.2        
## [16] stringr_1.5.0         data.table_1.14.8     readr_2.1.4          
## 
## loaded via a namespace (and not attached):
##   [1] colorspace_2.1-0        rjson_0.2.21            class_7.3-20           
##   [4] ellipsis_0.3.2          rprojroot_2.0.2         htmlTable_2.4.0        
##   [7] circlize_0.4.15         GlobalOptions_0.1.2     proxy_0.4-26           
##  [10] clue_0.3-64             httpcode_0.3.0          rstudioapi_0.14        
##  [13] farver_2.1.1            fansi_1.0.4             xml2_1.3.3             
##  [16] codetools_0.2-18        splines_4.1.2           doParallel_1.0.17      
##  [19] cachem_1.0.7            knitr_1.42              Formula_1.2-4          
##  [22] jsonlite_1.8.4          broom_1.0.4             cluster_2.1.2          
##  [25] png_0.1-7               shiny_1.7.4             compiler_4.1.2         
##  [28] backports_1.4.1         Matrix_1.4-0            fastmap_1.1.1          
##  [31] survey_4.1-1            cli_3.6.1               later_1.3.0            
##  [34] htmltools_0.5.5         tools_4.1.2             gtable_0.3.3           
##  [37] glue_1.6.2              Rcpp_1.0.10             jquerylib_0.1.4        
##  [40] fontquiver_0.2.1        vctrs_0.6.1             crul_1.3               
##  [43] iterators_1.0.14        xfun_0.38               mime_0.12              
##  [46] lifecycle_1.0.3         zoo_1.8-9               scales_1.2.1           
##  [49] ragg_1.2.1              hms_1.1.3               promises_1.2.0.1       
##  [52] parallel_4.1.2          fontLiberation_0.1.0    yaml_2.3.7             
##  [55] curl_5.0.0              gdtools_0.3.3           yulab.utils_0.0.6      
##  [58] sass_0.4.5              labelled_2.11.0         stringi_1.7.12         
##  [61] fontBitstreamVera_0.1.1 highr_0.10              S4Vectors_0.32.4       
##  [64] foreach_1.5.2           checkmate_2.0.0         e1071_1.7-9            
##  [67] BiocGenerics_0.40.0     zip_2.2.0               shape_1.4.6            
##  [70] rlang_1.1.0             pkgconfig_2.0.3         systemfonts_1.0.4      
##  [73] matrixStats_0.61.0      evaluate_0.20           lattice_0.20-45        
##  [76] htmlwidgets_1.6.2       labeling_0.4.2          tidyselect_1.2.0       
##  [79] magrittr_2.0.3          R6_2.5.1                IRanges_2.28.0         
##  [82] generics_0.1.3          DBI_1.1.3               pillar_1.9.0           
##  [85] haven_2.5.2             withr_2.5.0             survival_3.2-13        
##  [88] crayon_1.5.2            gfonts_0.2.0            uuid_1.1-0             
##  [91] utf8_1.2.3              tzdb_0.3.0              rmarkdown_2.21         
##  [94] officer_0.6.2           GetoptLong_1.0.5        forcats_1.0.0          
##  [97] digest_0.6.31           xtable_1.8-4            tidyr_1.3.0            
## [100] httpuv_1.6.5            gridGraphics_0.5-1      textshaping_0.3.6      
## [103] openssl_2.0.6           stats4_4.1.2            munsell_0.5.0          
## [106] bslib_0.4.2             mitools_2.4             askpass_1.1